Data Analysis Using R

Functions

  • upset_violin_plot to create violin + UpSet plots for AST measurements and fluoroquinolone resistance mechanisms
# create violin + UpSet plots for AST measurements and fluoroquinolone resistance mechanisms
upset_violin_plot <- function(df, x, y, z) {
# df - dataframe; x - Flq resistance mechanism column; y - Laboratory typing method column
# z - Testing.standard
  require(dplyr, ggplot2, ggupset)
  
  # checks if lab typing method is MIC or disk diffusion 
  if (grepl('MIC.*$', df[y])) {
    # checks if testing standard is EUCAST or CLSI
    if (grepl('EUCAST', df[z])) {
      df %>%
        group_by(strain, Measurement, Resistance.phenotype) %>% 
        # !! sym() allows one to pass a column name as an argument
        summarize(res_mech_list = list(!! sym(x))) %>%
        ggplot(aes(res_mech_list, Measurement)) + geom_violin() + geom_count(aes(colour = Resistance.phenotype)) + 
        # EUCAST cut-off of 1.0 mg/L is resistant
        geom_hline(aes(yintercept = 1.0, linetype = "resistant"), alpha = 0.6, color = "black") +
        # EUCAST cut-off of 0.25 mg/L is susceptible
        geom_hline(aes(yintercept = 0.25, linetype = "susceptible"), alpha = 0.6, color = "black") +
        labs(x = "Fluoroquinolone resistance determinants", y = "Ciprofloxacin MIC measurement (mg/L)", 
             size = "Number of strains", linetype = "MIC breakpoints", colour = "Phenotype") +
        # convert y-axis to log2 scale and labels to 3 dp
        scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:9), labels = function(x) round(as.numeric(x), digits = 3)) +
        scale_linetype_manual(values = c("solid", "dotted")) +
        scale_x_upset() +
        theme_combmatrix(combmatrix.panel.line.size = 0.8)
    } else {
      df %>%
        group_by(strain, Measurement, Resistance.phenotype) %>% 
        # !! sym() allows one to pass a column name as an argument
        summarize(res_mech_list = list(!! sym(x))) %>%
        ggplot(aes(res_mech_list, Measurement)) + geom_violin() + geom_count(aes(colour = Resistance.phenotype)) + 
        # CLSI cut-off of 1.0 mg/L is resistant
        geom_hline(aes(yintercept = 1.0, linetype = "resistant"), alpha = 0.6, color = "black") +
        # CLSI cut-off of 0.25 mg/L is susceptible
        geom_hline(aes(yintercept = 0.25, linetype = "susceptible"), alpha = 0.6, color = "black") +
        labs(x = "Fluoroquinolone resistance determinants", y = "Ciprofloxacin MIC measurement (mg/L)", 
             size = "Number of strains", linetype = "MIC breakpoints", colour = "Phenotype") +
        # convert y-axis to log2 scale and labels to 3 dp
        scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:9), labels = function(x) round(as.numeric(x), digits = 3)) +
        scale_linetype_manual(values = c("solid", "dotted")) +
        scale_x_upset() +
        theme_combmatrix(combmatrix.panel.line.size = 0.8)
    }
  } else {
    if (grepl('EUCAST', df[z])) {
      df %>%
        group_by(strain, Measurement, Resistance.phenotype) %>% 
        summarize(res_mech_list = list(!! sym(x))) %>%
        ggplot(aes(res_mech_list, Measurement)) + geom_violin() + geom_count(aes(colour = Resistance.phenotype)) + 
        # EUCAST cut-off of 21mm is resistant
        geom_hline(aes(yintercept = 21, linetype = "resistant"), alpha = 0.6, color = "black") +
        # EUCAST cut-off of 25mm is susceptible
        geom_hline(aes(yintercept = 25, linetype = "susceptible"), color = "black") +
        labs(title = "Fluoroquinolone resistance determinant combinations\nat various disk diffusion measurements", x = 
               "Fluoroquinolone resistance determinants", y = "Ciprofloxacin disk diffusion measurement (mm)", 
             size = "Number of strains", linetype = "Disk diffusion breakpoints", colour = "Phenotype") +
        scale_linetype_manual(values = c("solid", "dotted")) +
        scale_x_upset() +
        theme_combmatrix(combmatrix.panel.line.size = 0.8)
    } else {
      df %>%
        group_by(strain, Measurement, Resistance.phenotype) %>% 
        summarize(res_mech_list = list(!! sym(x))) %>%
        ggplot(aes(res_mech_list, Measurement)) + geom_violin() + geom_count(aes(colour = Resistance.phenotype)) + 
        # CLSI cut-off of 21mm is resistant
        geom_hline(aes(yintercept = 21, linetype = "resistant"), alpha = 0.6, color = "black") +
        # CLSI cut-off of 26mm is susceptible
        geom_hline(aes(yintercept = 26, linetype = "susceptible"), color = "black") +
        labs(title = "Fluoroquinolone resistance determinant combinations\nat various disk diffusion measurements", x = 
               "Fluoroquinolone resistance determinants", y = "Ciprofloxacin disk diffusion measurement (mm)", 
             size = "Number of strains", linetype = "Disk diffusion breakpoints", colour = "Phenotype") +
        scale_linetype_manual(values = c("solid", "dotted")) +
        scale_x_upset() +
        theme_combmatrix(combmatrix.panel.line.size = 0.8)
    }
  }
}
  • upset_bar_plot to create percent stacked bar charts + UpSet plots for AST measurements and fluoroquinolone resistance mechanisms
# create percent stacked barchart + UpSet plots for AST measurements and fluoroquinolone resistance mechanisms
upset_bar_plot <- function(df, x) {
# df - dataframe; x - Flq resistance mechanism column
  
  require(dplyr, ggplot2, ggupset)
  
  df %>%
    group_by(strain, Measurement) %>% 
    # !! sym() allows one to pass a column name as an argument
    summarize(res_mech_list = list(!! sym(x)), Resistance.phenotype) %>%
    ggplot(aes(res_mech_list, Measurement, fill = Resistance.phenotype)) + geom_bar(position="fill", stat="identity") +
    labs(x = "Fluoroquinolone resistance determinants", y = "% phenotype",
         fill = "Phenotype") +
    scale_x_upset() +
    theme_combmatrix(combmatrix.panel.line.size = 0.8)
}
  • group_res_mech to group resistance determinants
# function to group some resistance determinants
group_res_mech <- function(df, x) {
# df - data frame; x - column(s) with resistance determinanats
# {{ }} and := allows one to pass a column name as a function parameter
  
  require(dplyr)
  
  rare_res_qnr_genes <- c("qnrB7", "qnrE2", "qnrS2", "qnrVC1", "qnrVC6")

  df %>%
    # based on statistical analysis (chunk proportion), combine QRDR mutations to have just 4 types (GyrA-83, GyrA-87, ParC-80, ParC-84)
    # except GyrA-87H which is found in one strain and is sensitive
    mutate({{ x }} := replace({{ x }}, startsWith({{ x }}, "GyrA-83"), "GyrA-83")) %>%
    mutate({{ x }} := case_when(startsWith({{ x }}, "GyrA-87") & {{ x }} != "GyrA-87H" ~ "GyrA-87",
                                 TRUE ~ {{ x }})) %>%
    mutate({{ x }} := replace({{ x }}, startsWith({{ x }}, "ParC-80"), "ParC-80")) %>%
    mutate({{ x }} := replace({{ x }}, startsWith({{ x }}, "ParC-84"), "ParC-84")) %>%
    # also combine the following acquired genes
    mutate({{ x }} := replace({{ x }}, startsWith({{ x }}, "qepA2"), "qepA2")) %>%
    mutate({{ x }} := replace({{ x }}, startsWith({{ x }}, "qnrA1"), "qnrA1")) %>%
    mutate({{ x }} := replace({{ x }}, startsWith({{ x }}, "qnrB1"), "qnrB1")) %>%
    mutate({{ x }} := replace({{ x }}, startsWith({{ x }}, "qnrB19"), "qnrB19")) %>%
    mutate({{ x }} := replace({{ x }}, startsWith({{ x }}, "qnrB2"), "qnrB2")) %>%
    mutate({{ x }} := replace({{ x }}, startsWith({{ x }}, "qnrS1"), "qnrS1")) %>%
    mutate({{ x }} := replace({{ x }}, grepl(paste(rare_res_qnr_genes, collapse = "|"), {{ x }}), "rare_res_qnr"))
}

Load required libraries

library(here)
library(tidyverse)
library(scales)
library(ggupset)
library(RColorBrewer)
library(collapse)
library(DT)
library(naniar)

Load antibiogram data and filter for ciprofloxacin

# read antibiogram data
antibiogram <- read.delim(file = "antibiogram_combined.tsv", 
                                header = TRUE, sep = "\t")

# filter for ciprofloxacin
# edit: remove Omp mutations from analysis as based on violin + upset plots, they
# do not seem to contribute to ciprofloxacin resistance
cipro_antibiogram <- antibiogram %>%
  filter(Antibiotic == "ciprofloxacin") %>%
  select(strain:ST, Flq_mutations, Flq_acquired, Antibiotic:Measurement.Round) %>%
  mutate(Resistance.phenotype = case_when(grepl('MIC.*$', Laboratory.Typing.Method) & Measurement <= 0.25 ~ 
                                            "susceptible",
                                          grepl('MIC.*$', Laboratory.Typing.Method) & Testing.standard == 
                                            "EUCAST" & Measurement > 0.5 ~ "resistant",
                                          grepl('MIC.*$', Laboratory.Typing.Method) & Testing.standard == 
                                            "CLSI" & Measurement >= 1 ~ "resistant",
                                          Laboratory.Typing.Method == "Disk diffusion" & Testing.standard == 
                                            "EUCAST" & Measurement >= 25 ~ "susceptible",
                                          Laboratory.Typing.Method == "Disk diffusion" & Testing.standard == 
                                            "EUCAST" & Measurement < 22 ~ "resistant",
                                          Laboratory.Typing.Method == "Disk diffusion" & Testing.standard == 
                                            "CLSI" & Measurement >= 26 ~ "susceptible",
                                          Laboratory.Typing.Method == "Disk diffusion" & Testing.standard == 
                                            "CLSI" & Measurement <= 21 ~ "resistant",
                                          TRUE ~ "intermediate")) %>%
  # filter out strains with measurement of 0.12 and 0.75 (no such MIC measurements)
  filter(Measurement != 0.12 & Measurement != 0.75)

#write_delim(cipro_antibiogram, file = "cipro_antibiogram.tsv", delim = "\t")

# combine antibiogram with mutations not in kleborate data file
no_mut_kleborate <- read.delim(file = "mutations_not_in_kleborate.tsv",
                               header = TRUE, sep = "\t")

# combine antibiogram with mutations not in kleborate data file
cipro_antibiogram <- cipro_antibiogram %>%
  left_join(no_mut_kleborate, by = c("strain")) %>%
  mutate(Flq_mutations = case_when(Flq_mutations.y %in% NA ~ Flq_mutations.x, 
                                   Flq_mutations.x == "-" & Flq_mutations.y %in% NA ~ Flq_mutations.x,
                                   Flq_mutations.x == "-" ~ Flq_mutations.y,
                                   Flq_mutations.x != "-" & !is.na(Flq_mutations.y) & !Flq_mutations.x %in% Flq_mutations.y ~
                                     paste(Flq_mutations.x, Flq_mutations.y, sep = ";"),
                                   TRUE ~ Flq_mutations.x)) %>%
  select(- c(Flq_mutations.x, Flq_mutations.y)) %>%
  relocate(Flq_mutations, .after = ST)

# separate mutations and check for duplicates then combine them again into the Flq_mutations column
cipro_antibiogram <- cipro_antibiogram %>%
  pivot_longer(Flq_mutations, names_to = "Flq.resistance.mech", 
               values_to = "Resistance.mech.type") %>%
  separate(Resistance.mech.type, into = c("type_A", "type_B", "type_C", "type_D", "type_E", "type_F", "type_G", "type_H", "type_I"), 
           sep = "[;]")

cipro_antibiogram <- cipro_antibiogram %>%
  pivot_longer(cols = names(cipro_antibiogram)[27:35], values_to = "Resistance.mech.type") %>%
  drop_na(Resistance.mech.type) %>%
  select(-name) %>%
  filter(!duplicated(cbind(strain, Resistance.mech.type))) %>%
  pivot_wider(names_from = Flq.resistance.mech, values_from = Resistance.mech.type,
              values_fn = function(x) paste(x, collapse=";")) %>%
  relocate(Flq_mutations, .before = Flq_acquired)

Summaries

Sample count per dataset

cipro_antibiogram %>%
  group_by(index) %>%
  ggplot(aes(x = index)) +
  geom_bar(fill = "#ff8000", alpha = 0.6) +
  geom_text(aes(label = ..count..), stat = "count", nudge_y = 60) +
  labs(title = "Total sample count per dataset", y = "Total sample count", fill = "Lab typing method") +
  theme(axis.title.x = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1))

Total sample counts vs measurements for different lab typing methods

MIC (broth dilution)

CLSI
cipro_antibiogram %>%
  group_by(Laboratory.Typing.Method) %>%
  filter(Laboratory.Typing.Method == "MIC (broth dilution)") %>%
  filter(Testing.standard == "CLSI") %>% 
  ggplot(aes(x = factor(Measurement), fill = index)) + geom_bar() + 
  # CLSI cut-off of 1.0 mg/L is resistant
  geom_vline(aes(xintercept = 5, linetype = "resistant"), alpha = 0.6, color = "black") +
  # CLSI cut-off of 0.25 mg/L is susceptible
  geom_vline(aes(xintercept = 3, linetype = "susceptible"), color = "black") +
  labs(title = "Total sample count at various MIC (broth dilution) measurements", x = "Measurement (mg/L)", y = "Frequency", fill = "Dataset", 
       linetype = "MIC breakpoints") +
  scale_linetype_manual(values = c("solid", "dotted")) 

EUCAST
cipro_antibiogram %>%
  group_by(Laboratory.Typing.Method) %>%
  filter(Laboratory.Typing.Method == "MIC (broth dilution)") %>%
  filter(Testing.standard == "EUCAST") %>% 
  ggplot(aes(x = factor(Measurement), fill = index)) + geom_bar() + 
  # EUCAST cut-off of 1.0 mg/L is resistant
  geom_vline(aes(xintercept = 8, linetype = "resistant"), alpha = 0.6, color = "black") +
  # EUCAST cut-off of 0.25 mg/L is susceptible
  geom_vline(aes(xintercept = 5, linetype = "susceptible"), color = "black") +
  labs(title = "Total sample count at various MIC (broth dilution) measurements", x = "Measurement (mg/L)", y = "Frequency", fill = "Dataset", 
       linetype = "MIC breakpoints") +
  scale_linetype_manual(values = c("solid", "dotted")) 

MIC (agar dilution)

EUCAST
# only EUCAST used as testing standard
cipro_antibiogram %>%
  group_by(Laboratory.Typing.Method) %>%
  filter(Laboratory.Typing.Method == "MIC (agar dilution)") %>% 
  ggplot(aes(x = factor(Measurement), fill = index)) + geom_bar() + 
  # EUCAST cut-off of 1.0 mg/L is resistant
  geom_vline(aes(xintercept = 6, linetype = "resistant"), alpha = 0.6, color = "black") +
  # EUCAST cut-off of 0.25 mg/L is susceptible
  geom_vline(aes(xintercept = 4, linetype = "susceptible"), color = "black") +
  labs(title = "Total sample count at various MIC (agar dilution) measurements", x = "Measurement (mg/L)", y = "Frequency", fill = "Dataset", 
       linetype = "MIC breakpoints") +
  # round off x-axis labels to 2dp
  scale_x_discrete(labels = function(x) round(as.numeric(x), digits = 2)) +
  scale_linetype_manual(values = c("solid", "dotted")) 

Disk diffusion

CLSI
cipro_antibiogram %>%
  group_by(Laboratory.Typing.Method) %>%
  filter(Laboratory.Typing.Method == "Disk diffusion") %>%
  filter(Testing.standard == "CLSI") %>%
  ggplot(aes(Measurement, fill = index)) + geom_bar() + 
  # CLSI cut-off of 21mm is resistant
  geom_vline(aes(xintercept = 21, linetype = "resistant"), color = "black") +
  # CLSI cut-off of 26mm is susceptible
  geom_vline(aes(xintercept = 26, linetype = "susceptible"), color = "black") +
  labs(title = "Total sample count at various disk diffusion measurements", x = "Measurement (mm)", y = "Frequency", fill = "Dataset", 
       linetype = "Disk diffusion \nbreakpoints") +
  scale_linetype_manual(values = c("solid", "dotted"))

EUCAST
cipro_antibiogram %>%
  group_by(Laboratory.Typing.Method) %>%
  filter(Laboratory.Typing.Method == "Disk diffusion") %>%
  filter(Testing.standard == "EUCAST") %>%
  ggplot(aes(Measurement, fill = index)) + geom_bar() + 
  # EUCAST cut-off of 21mm is resistant
  geom_vline(aes(xintercept = 21, linetype = "resistant"), color = "black") +
  # CLSI cut-off of 25mm is susceptible
  geom_vline(aes(xintercept = 25, linetype = "susceptible"), color = "black") +
  labs(title = "Total sample count at various disk diffusion measurements", x = "Measurement (mm)", y = "Frequency", fill = "Dataset", 
       linetype = "Disk diffusion \nbreakpoints") +
  scale_linetype_manual(values = c("solid", "dotted"))

UpSet plots for different lab typing methods to visualise resistance mechanism combinations

# combine Flq_mutations and Flq_acquired into one column
# edit: remove Omp mutations from analysis as based on violin + upset plots, they
# do not seem to contribute to ciprofloxacin resistance
cipro_res_mech <- cipro_antibiogram %>%
  pivot_longer(Flq_mutations:Flq_acquired, names_to = "Flq.resistance.mech", 
               values_to = "Resistance.mech.type") %>%
  separate(Resistance.mech.type, into = c("type_A", "type_B", "type_C"), sep = "[;]")

cipro_res_mech <- cipro_res_mech %>%
  pivot_longer(cols = names(cipro_res_mech)[26:28], values_to = "Resistance.mech.type") %>%
  drop_na(Resistance.mech.type) %>%
  select(-name) 

# group gyrA, parC, and qnr genes
cipro_res_mech <- cipro_res_mech %>%
  group_res_mech(Resistance.mech.type)

# explicitly label strains with wildtype QRDR (gyrA/parC) and no acquired genes as such
cipro_res_mech <- cipro_res_mech %>%
  mutate(Resistance.mech.type = replace(Resistance.mech.type, Flq.resistance.mech=="Flq_mutations" & Resistance.mech.type=="-", "wt_QRDR")) %>%
  mutate(Resistance.mech.type = replace(Resistance.mech.type, Flq.resistance.mech=="Flq_acquired" & Resistance.mech.type=="-", "no_acquired_genes"))

Summary bar plot of resistance mechanisms

Count table

# count number of occurrences of the different fluoroquinolone resistance determinants
summ_res_mech <- cipro_res_mech %>%
  filter(Resistance.mech.type != "wt_QRDR" & Resistance.mech.type != "no_acquired_genes") %>%
  group_by(Flq.resistance.mech, Resistance.mech.type) %>%
  summarise(count = n()) 

DT::datatable(summ_res_mech,
              width = "50%",
              extensions = "FixedHeader",
              rownames = FALSE,
              options = list(paging=TRUE, fixedHeader=TRUE, scrollX=TRUE))

Summary bar plot

summ_res_mech %>%
  ggplot(aes(Resistance.mech.type, count, fill = Flq.resistance.mech)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = count), stat = "identity", nudge_y = 60) +
  labs(title = "Total sample count per resistance determinant", x = "Resistance mechanism",
       y = "Total sample count", fill = "Fluoroquinolone resistance \nmechanism type") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  scale_fill_brewer(palette = "Accent")

UpSet plots

Violin plots

MIC (broth dilution)
CLSI
cipro_res_mech %>%
  filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
  filter(Laboratory.Typing.Method == "MIC (broth dilution)") %>%
  filter(Testing.standard == "CLSI") %>%
  upset_violin_plot("Resistance.mech.type", "Laboratory.Typing.Method", "Testing.standard") +
  labs(title = "Fluoroquinolone resistance determinant combinations at various MIC (broth dilution) measurements")

EUCAST
cipro_res_mech %>%
  filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
  filter(Laboratory.Typing.Method == "MIC (broth dilution)") %>%
  filter(Testing.standard == "EUCAST") %>%
  upset_violin_plot("Resistance.mech.type", "Laboratory.Typing.Method", "Testing.standard") +
  labs(title = "Fluoroquinolone resistance determinant combinations at various MIC (broth dilution) measurements") 

MIC (agar dilution)
EUCAST
# only EUCAST used as testing standard
cipro_res_mech %>%
  filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
  filter(Laboratory.Typing.Method == "MIC (agar dilution)") %>%
  upset_violin_plot("Resistance.mech.type", "Laboratory.Typing.Method", "Testing.standard") +
  labs(title = "Fluoroquinolone resistance determinant combinations\nat various MIC (agar dilution) measurements") 

Disk diffusion
CLSI
cipro_res_mech %>%
  filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
  filter(Laboratory.Typing.Method == "Disk diffusion") %>%
  filter(Testing.standard == "CLSI") %>%
  upset_violin_plot("Resistance.mech.type", "Laboratory.Typing.Method", "Testing.standard") 

EUCAST
cipro_res_mech %>%
  filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
  filter(Laboratory.Typing.Method == "Disk diffusion") %>%
  filter(Testing.standard == "EUCAST") %>%
  upset_violin_plot("Resistance.mech.type", "Laboratory.Typing.Method", "Testing.standard")

Percent barcharts

MIC (broth dilution)
CLSI
cipro_res_mech %>%
  filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
  filter(Laboratory.Typing.Method == "MIC (broth dilution)") %>%
  filter(Testing.standard == "CLSI") %>%
  upset_bar_plot("Resistance.mech.type") +
  labs(title = "Phenotypes for fluoroquinolone resistance determinant combinations")

EUCAST
cipro_res_mech %>%
  filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
  filter(Laboratory.Typing.Method == "MIC (broth dilution)") %>%
  filter(Testing.standard == "EUCAST") %>%
  upset_bar_plot("Resistance.mech.type") +
  labs(title = "Phenotypes for fluoroquinolone resistance determinant combinations")

MIC (agar dilution)
EUCAST
# only EUCAST used as testing standard
cipro_res_mech %>%
  filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
  filter(Laboratory.Typing.Method == "MIC (agar dilution)") %>%
  upset_bar_plot("Resistance.mech.type") +
  labs(title = "Phenotypes for fluoroquinolone resistance determinant combinations")

Disk diffusion
CLSI
cipro_res_mech %>%
  filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
  filter(Laboratory.Typing.Method == "Disk diffusion") %>%
  filter(Testing.standard == "CLSI") %>%
  upset_bar_plot("Resistance.mech.type") +
  labs(title = "Phenotypes for fluoroquinolone resistance\ndeterminant combinations")

EUCAST
cipro_res_mech %>%
  filter(!(Resistance.mech.type == "no_acquired_genes")) %>% # exclude these rows as not informative
  filter(Laboratory.Typing.Method == "Disk diffusion") %>%
  filter(Testing.standard == "EUCAST") %>%
  upset_bar_plot("Resistance.mech.type") +
  labs(title = "Phenotypes for fluoroquinolone resistance\ndeterminant combinations")

ST analysis

Resistant strains displaying fluoroquinolone resistance mechanisms

ST count

# count STs with resistant phenotype
top_35_st_res <- cipro_antibiogram %>%
  filter(Resistance.phenotype == "resistant") %>%
  filter(Flq_mutations != "-" | Flq_acquired != "-") %>%
  group_by(ST) %>%
  summarise(sample_count = n()) %>%
  as.data.frame() %>%
  arrange(desc(sample_count)) %>%
  head(35) %>%
  mutate(ST = factor(ST, levels = ST, ordered = TRUE))

DT::datatable(top_35_st_res, 
              width = "50%",
              extensions = "FixedHeader",
              rownames = FALSE,
              options = list(paging=TRUE, fixedHeader=TRUE, scrollX=TRUE))

Summary bar plot

# Blues palette with 9 colors
colour_palette <- brewer.pal(9, "Blues") 

# Add more colors to this palette :
colour_palette <- colorRampPalette(colour_palette)(35)

top_35_st_res %>%
  ggplot(aes(y = ST, x = sample_count, fill = ST, label = sample_count)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_text(hjust = -0.5) +
  scale_y_discrete(limits = rev) +
  ggtitle("Top STs with fluoroquinolone resistance") +
  labs(x = "Sample count") +
  # reverse order of colour palette
  scale_fill_manual(values = rev(colour_palette)) +
  theme_minimal() +
  theme(legend.position = "none",
  axis.text = element_text(size = 10)) +
  coord_cartesian(clip = "off")

Resistant STs associated with specific fluoroquinolone resistance mechanisms

Proportion table
st_totals <- cipro_antibiogram %>%
  group_by(ST) %>%
  summarise(N=n())

st_res_mech <- cipro_res_mech %>%
  filter(Resistance.phenotype == "resistant") %>%
  filter(Resistance.mech.type != "wt_QRDR", Resistance.mech.type != "no_acquired_genes") %>%
  count(ST, Resistance.mech.type) %>% 
  left_join(st_totals) %>% # ST total counts
  mutate(prop = n/N) %>% # proportion per ST
  mutate(se = sqrt(prop*(1-prop)/N)) %>%
  mutate(p_lowerCI = prop-1.96*se, p_upperCI = prop+1.96*se) %>%
  mutate(p_lowerCI = replace(p_lowerCI, p_lowerCI<0, 0)) %>%
  mutate(p_upperCI = replace(p_upperCI, p_upperCI>1, 1)) %>%
  select(-se) %>%
  mutate(ST = factor(ST, levels = unique(ST)))

DT::datatable(st_res_mech, 
              extensions = "FixedHeader",
              rownames = FALSE,
              options = list(paging=TRUE, fixedHeader=TRUE, scrollX=TRUE))
Heatmap of the proportions of resistance determinants per ST
st_res_mech %>%
  filter(ST %in% st_res_mech$ST[st_res_mech$prop>0.1 & st_res_mech$N>50]) %>%
  ggplot(aes(x=ST, y = Resistance.mech.type)) +
  geom_tile(aes(fill=prop)) +
  labs(subtitle = "Fluoroquinolone resistance mechanisms in top resistant STs", 
       y = "Fluoroquinolone resistance mechanisms", fill = "Proportion per \nST") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8, colour = "black")) + 
  scale_fill_gradient(low = "white", high = "red")

Heatmap/UpSet plot showing %proportion of resistance determinant combinations per ST
Proportion table
# separate resistance determinants
cipro_res_mech_prop <- cipro_antibiogram %>%
  separate(Flq_mutations, into = c("mut_A", "mut_B", "mut_C"), sep = "[;]") %>%
  separate(Flq_acquired, into = c("acq_A", "acq_B", "acq_C"), sep = "[;]") 

# group determinants based on statistical analysis
cipro_res_mech_prop <- cipro_res_mech_prop %>%
  group_res_mech(mut_A) %>%
  group_res_mech(mut_B) %>%
  group_res_mech(mut_C) %>%
  group_res_mech(acq_A) %>%
  group_res_mech(acq_B) %>%
  group_res_mech(acq_C) 

# combine previously separated resistance determinants columns
cipro_res_mech_prop <- cipro_res_mech_prop %>%
  unite("Flq_mutations", mut_A, mut_B, mut_C, sep = ";", na.rm = TRUE) %>%
  unite("Flq_acquired", acq_A, acq_B, acq_C, sep = ";", na.rm = TRUE) 

# combine all the resistance determinants columns using a ";" excluding rows without
# mutations 
cipro_res_mech_prop <- cipro_res_mech_prop %>%
  # replace "-" with NA
  replace_with_na(replace = list(Flq_mutations = "-", Flq_acquired = "-")) %>%
  unite("res_mech", Flq_mutations:Flq_acquired, sep = ";", remove = FALSE, na.rm = TRUE) %>%
  # replace NA with "-"
  replace_na(list(Flq_mutations = "-", Flq_acquired = "-")) 

# replace empty cells with "-"
cipro_res_mech_prop$res_mech <- sub("^$", "-", cipro_res_mech_prop$res_mech)

# calculate proportions of different combinations
st_res_mech_prop <- cipro_res_mech_prop %>%
  filter(Resistance.phenotype == "resistant") %>%
  filter(res_mech != "-") %>%
  group_by(ST) %>%
  count(ST, res_mech) %>%
  left_join(st_totals) %>% # ST total counts
  mutate(prop = n/N) %>% # proportion per ST
  mutate(se = sqrt(prop*(1-prop)/N)) %>%
  mutate(p_lowerCI = prop-1.96*se, p_upperCI = prop+1.96*se) %>%
  mutate(p_lowerCI = replace(p_lowerCI, p_lowerCI<0, 0)) %>%
  mutate(p_upperCI = replace(p_upperCI, p_upperCI>1, 1)) %>%
  select(-se) %>%
  mutate(ST = factor(ST, levels = unique(ST))) %>%
  arrange(res_mech) 

DT::datatable(st_res_mech_prop, 
              extensions = "FixedHeader",
              rownames = FALSE,
              options = list(paging=TRUE, fixedHeader=TRUE, scrollX=TRUE))
Heatmap of different combinations per ST
# create heatmap/upset plot of different combinations per ST
st_res_mech_prop %>%
  filter(ST %in% st_res_mech$ST[st_res_mech$prop>0.1 & st_res_mech$N>50]) %>%
  summarise(ST, res_mech, prop) %>%
  ggplot(aes(x=res_mech, y=ST, fill=prop)) +
  geom_tile() +
  axis_combmatrix(sep = ";") +
  theme_combmatrix(combmatrix.panel.line.size = 0.8) + 
  scale_fill_gradient(low = "white", high = "red") +
  labs(x = "Resistance determinants", fill = "Proportion per\nST",
       title = "Proportions of fluoroquinolone resistance determinant combinations per ST")

Resistant strains displaying no fluoroquinolone resistance mechanisms

Count table

no_res_mech <- cipro_antibiogram %>%
  filter(Flq_mutations == "-", Flq_acquired == "-") %>%
  filter(Resistance.phenotype == "resistant")

st_no_res_mech <- no_res_mech %>%
  group_by(ST) %>%
  summarise(sample_count = n()) %>%
  as.data.frame() %>%
  arrange(desc(sample_count)) %>%
  mutate(ST = factor(ST, levels = ST, ordered = TRUE))

DT::datatable(st_no_res_mech,
              width = "50%",
              extensions = "FixedHeader",
              rownames = FALSE,
              options = list(paging=TRUE, fixedHeader=TRUE, scrollX=TRUE))

Bar plot

st_no_res_mech %>%
  filter(sample_count > 2) %>%
  ggplot(aes(ST, sample_count)) +
  geom_bar(stat = "identity", fill = "blue", alpha = 0.7) +
  labs(title = "Resistant STs with no fluoroquinolone resistance mechanisms", y = "Count") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Susceptible strains displaying fluoroquinolone resistance mechanisms

Count table

sus_res_mech <- cipro_res_mech %>%
  filter(Resistance.phenotype == "susceptible") %>%
  filter(Resistance.mech.type != "wt_QRDR", Resistance.mech.type != "no_acquired_genes")

st_sus_res_mech <- sus_res_mech %>%
  group_by(ST) %>%
  summarise(res_mech = Resistance.mech.type) %>%
  as.data.frame() %>%
  count(ST, res_mech, sort = TRUE) %>%
  mutate(ST = factor(ST, levels = unique(ST)))

DT::datatable(st_sus_res_mech,
              width = "50%",
              extensions = "FixedHeader",
              rownames = FALSE,
              options = list(paging=TRUE, fixedHeader=TRUE, scrollX=TRUE))

Bar plot

st_sus_res_mech %>%
  filter(n > 1) %>%
  ggplot(aes(ST, n, fill = res_mech)) +
  geom_bar(stat = "identity") +
  labs(title = "Fluoroquinolone resistance mechanisms employed by the top susceptible STs", y = "Count", fill = "Fluoroquinolone resistance\nmechanisms") +
  # put legend in one column
  guides(fill = guide_legend(ncol = 1)) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

UpSet plots

MIC (broth dilution)
CLSI
sus_res_mech %>%
  filter(Laboratory.Typing.Method == "MIC (broth dilution)") %>%
  filter(Testing.standard == "CLSI") %>%
  upset_violin_plot("Resistance.mech.type", "Laboratory.Typing.Method", "Testing.standard") +
  labs(title = "Fluoroquinolone resistance determinant combinations\nat various MIC (broth dilution) measurements") 

EUCAST
sus_res_mech %>%
  filter(Laboratory.Typing.Method == "MIC (broth dilution)") %>%
  filter(Testing.standard == "EUCAST") %>%
  upset_violin_plot("Resistance.mech.type", "Laboratory.Typing.Method", "Testing.standard") +
  labs(title = "Fluoroquinolone resistance determinant combinations\nat various MIC (broth dilution) measurements") 

MIC (agar dilution)
EUCAST
# only EUCAST used as testing standard
sus_res_mech %>%
  filter(Laboratory.Typing.Method == "MIC (agar dilution)") %>%
  upset_violin_plot("Resistance.mech.type", "Laboratory.Typing.Method", "Testing.standard") +
  labs(title = "Fluoroquinolone resistance determinant combinations\nat various MIC (agar dilution) measurements") 

Disk diffusion
EUCAST
# only EUCAST has values
sus_res_mech %>%
  filter(Laboratory.Typing.Method == "Disk diffusion") %>%
  filter(Testing.standard == "EUCAST") %>%
  upset_violin_plot("Resistance.mech.type", "Laboratory.Typing.Method", "Testing.standard")

Statistical analysis

Proportion test

# calculate frequencies of different resistance mechanisms for the different phenotypes
# including numbers for the wildtype groups for comparison to compare resistance frequency 
# amongst strains with the determinant vs strains without
mech_freq <- cipro_res_mech %>%
  group_by(Resistance.mech.type, Resistance.phenotype) %>%
  filter(Resistance.phenotype != "intermediate") %>%
  summarise(n = n()) %>%
  ungroup() %>%
  group_by(Resistance.mech.type) %>%
  pivot_wider(names_from = Resistance.phenotype, values_from = n) 

# convert na values to 0
mech_freq[is.na(mech_freq)] <- 0

# find totals of different resistance mechanisms
mech_freq <- mech_freq %>%
  mutate(total = sum(c(resistant, susceptible)))


# compare each gyrA/parC SNP vs wildtype QRDR
qrdr_wt <- mech_freq %>% filter(Resistance.mech.type == "wt_QRDR")
qrdr_mut <- mech_freq %>% 
  filter(startsWith(Resistance.mech.type, "GyrA") | startsWith(Resistance.mech.type, "ParC")) %>%
  mutate(wt_res = qrdr_wt[2], wt_total=qrdr_wt[4]) # add columns with the wildtype counts for res & total
  
# run prop.test() to each row, comparing the resistant counts (x) with the total counts (n)
qrdr_prop_out <- dapply(as.matrix(qrdr_mut), MARGIN = 1, 
       FUN = function(y) broom::tidy(prop.test(x=c(as.numeric(y[2]), as.numeric(y[5])), n=c(as.numeric(y[4]), as.numeric(y[6])))))

# combine qrdr_mut and qrdr_prop_out
qrdr_mut_freq_prop <- cbind(qrdr_mut, qrdr_prop_out) %>%
  select(resistant, total, estimate1, estimate2, p.value) %>%
  mutate(p.value=as.numeric(p.value))

qrdr_mut_freq_prop

# specific mutations observed more than once, significantly associated with elevated resistance (vs wildtype QRDR)
view(qrdr_mut_freq_prop %>% filter(total>1) %>% filter(p.value < 0.05))

# the data provides evidence that ANY mutation at the 4 codons we track is associated with resistance, even though for some individual mutations we only have one observation:
# check gyrA83, there are 5 mutations observed at this codon (A, F, I, L, Y)
# 4 have large numbers (≥10) and significant p-values, gyrA-83A is observed only once but is also resistant
qrdr_mut_freq_prop %>% filter(startsWith(Resistance.mech.type,"GyrA-83"))

# Similar for GyrA-87, except that GyrA87-H is only seen in one strain and it is sensitive, so exclude this one (not a borderline phenotype)
qrdr_mut_freq_prop %>% filter(startsWith(Resistance.mech.type,"GyrA-87"))

# ParC-80 and ParC-84 look the same as for GyrA-83, ie all mutations at the site have same effect can be grouped together
qrdr_mut_freq_prop %>% filter(startsWith(Resistance.mech.type,"ParC-80"))
qrdr_mut_freq_prop %>% filter(startsWith(Resistance.mech.type,"ParC-84"))

# based on the statistical analysis, QRDR mutations will be updated to have just 4 types 
# (GyrA-83, GyrA-87, ParC-80, ParC-84) instead of 19 most of which are rare


# compare each Omp mutations vs wildtype Omp
#omp_wt <- mech_freq %>% filter(Resistance.mech.type == "wt_Omp")
#omp_mut <- mech_freq %>% 
  #filter(startsWith(Resistance.mech.type, "Omp")) %>%
  #mutate(wt_res = omp_wt[2], wt_total=omp_wt[4]) # add columns with the wildtype counts for res & total

# run prop.test() to each row, comparing the resistant counts (x) with the total counts (n)
#omp_prop_out <- dapply(as.matrix(omp_mut), MARGIN = 1, 
       #FUN = function(y) broom::tidy(prop.test(x=c(as.numeric(y[2]), as.numeric(y[5])), n=c(as.numeric(y[4]), as.numeric(y[6])))))

#omp_mut_freq_prop <- cbind(omp_mut, omp_prop_out) %>% 
  #select(resistant, total, estimate1, estimate2, p.value) %>%
  #mutate(p.value=as.numeric(p.value))

# all four mutations (truncation of either ompK35 or ompK36; and insertion of GD or TD in OmpK36, are all significantly associated with resistance)
#view(omp_mut_freq_prop)


# compare acquired vs no acquired genes
no_acquired <- mech_freq %>% filter(Resistance.mech.type == "no_acquired_genes")
acquired_genes <- mech_freq %>% 
  filter(startsWith(Resistance.mech.type, "qnr") | startsWith(Resistance.mech.type, "qep") ) %>%
  mutate(wt_res = no_acquired[2], wt_total=no_acquired[4]) # add columns with the wildtype counts for res & total

# run prop.test() to each row, comparing the resistant counts (x) with the total counts (n)
acquired_genes_prop_out <- dapply(as.matrix(acquired_genes), MARGIN = 1, 
       FUN = function(y) broom::tidy(prop.test(x=c(as.numeric(y[2]), as.numeric(y[5])), n=c(as.numeric(y[4]), as.numeric(y[6])))))

acquired_genes_freq_prop <- cbind(acquired_genes, acquired_genes_prop_out) %>% 
  select(resistant, total, estimate1, estimate2, p.value) %>%
  mutate(p.value=as.numeric(p.value))

view(acquired_genes_freq_prop)

# from the statistical analysis, the following can be combined:
# qepA2 (n=1, which is resistant) with qepA2* (n=17, all of which are resistant)
# qnrA1 and qnrA1^ (note the ^ means they are identical at protein level anyway) as all are resistant
# qnrB1.v1, qnrB1.v2, qnrB1.v2^ for similar reasons
# qnrB19, qnrB19^
# qnrB2.1, qnrB2.2
# qnrS1, qnrS1?, qnrS1*
# could also combine all the 'rare' genes that are only seen 1-2 times each but are always resistant:
# -> qnrB7, qnrE2, qnrS2, qnrVC1, qnrVC6

Session information

sessionInfo()
## R version 4.2.2 Patched (2022-11-10 r83330)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.1 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] naniar_0.6.1       DT_0.26.1          collapse_1.8.9     RColorBrewer_1.1-3
##  [5] ggupset_0.3.0      scales_1.2.1       forcats_0.5.2      stringr_1.4.1     
##  [9] dplyr_1.0.10       purrr_0.3.5        readr_2.1.3        tidyr_1.2.1       
## [13] tibble_3.1.8       ggplot2_3.4.0      tidyverse_1.3.2    here_1.0.1        
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.9          lubridate_1.9.0     assertthat_0.2.1   
##  [4] rprojroot_2.0.3     digest_0.6.30       utf8_1.2.2         
##  [7] R6_2.5.1            cellranger_1.1.0    backports_1.4.1    
## [10] visdat_0.5.3        reprex_2.0.2        evaluate_0.18      
## [13] highr_0.9           httr_1.4.4          pillar_1.8.1       
## [16] rlang_1.0.6         googlesheets4_1.0.1 readxl_1.4.1       
## [19] rstudioapi_0.14     jquerylib_0.1.4     rmarkdown_2.18     
## [22] labeling_0.4.2      googledrive_2.0.0   htmlwidgets_1.5.4  
## [25] munsell_0.5.0       broom_1.0.1         compiler_4.2.2     
## [28] modelr_0.1.10       xfun_0.35           pkgconfig_2.0.3    
## [31] htmltools_0.5.3     tidyselect_1.1.2    fansi_1.0.3        
## [34] crayon_1.5.2        tzdb_0.3.0          dbplyr_2.2.1       
## [37] withr_2.5.0         grid_4.2.2          jsonlite_1.8.3     
## [40] gtable_0.3.0        lifecycle_1.0.3     DBI_1.1.3          
## [43] magrittr_2.0.3      cli_3.4.1           stringi_1.7.8      
## [46] cachem_1.0.6        farver_2.1.1        fs_1.5.2           
## [49] xml2_1.3.3          bslib_0.4.1         ellipsis_0.3.2     
## [52] generics_0.1.2      vctrs_0.5.1         tools_4.2.2        
## [55] glue_1.6.2          crosstalk_1.2.0     hms_1.1.2          
## [58] parallel_4.2.2      fastmap_1.1.0       yaml_2.3.6         
## [61] timechange_0.1.1    colorspace_2.0-3    gargle_1.2.1       
## [64] rvest_1.0.3         knitr_1.41          haven_2.5.1        
## [67] sass_0.4.3